package com.bestv.search.engine.compass.spellcheck;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/**
 * Levenshtein edit distance class.
 */
public final class LevensteinDistance implements StringDistance {

    /**
     * Optimized to run a bit faster than the static getDistance(). In one
     * benchmark times were 5.3sec using ctr vs 8.5sec w/ static method, thus
     * 37% faster.
     */
    public LevensteinDistance() {
    }

    // *****************************
    // Compute Levenshtein distance: see
    // org.apache.commons.lang.StringUtils#getLevenshteinDistance(String,
    // String)
    // *****************************
    public float getDistance(String target, String other) {
        char[] sa;
        int n;
        int p[]; // 'previous' cost array, horizontally
        int d[]; // cost array, horizontally
        int _d[]; // placeholder to assist in swapping p and d

        /*
         * The difference between this impl. and the previous is that, rather
         * than creating and retaining a matrix of size s.length()+1 by
         * t.length()+1, we maintain two single-dimensional arrays of length
         * s.length()+1. The first, d, is the 'current working' distance array
         * that maintains the newest distance cost counts as we iterate through
         * the characters of String s. Each time we increment the index of
         * String t we are comparing, d is copied to p, the second int[]. Doing
         * so allows us to retain the previous cost counts as required by the
         * algorithm (taking the minimum of the cost count to the left, up one,
         * and diagonally up and to the left of the current cost count being
         * calculated). (Note that the arrays aren't really copied anymore, just
         * switched...this is clearly much better than cloning an array or doing
         * a System.arraycopy() each time through the outer loop.)
         *
         * Effectively, the difference between the two implementations is this
         * one does not cause an out of memory condition when calculating the LD
         * over two very large strings.
         */

        sa = target.toCharArray();
        n = sa.length;
        p = new int[n + 1];
        d = new int[n + 1];

        final int m = other.length();

        if (n == 0) {
            return 1;
        } else if (m == 0) {
            return 1;
        }

        // indexes into strings s and t
        int i; // iterates through s
        int j; // iterates through t

        char t_j; // jth character of t

        int cost; // cost

        for (i = 0; i <= n; i++) {
            p[i] = i;
        }

        for (j = 1; j <= m; j++) {
            t_j = other.charAt(j - 1);
            d[0] = j;

            for (i = 1; i <= n; i++) {
                cost = sa[i - 1] == t_j ? 0 : 1;
                // minimum of cell to the left+1, to the top+1, diagonally left
                // and up +cost
                d[i] = Math.min(Math.min(d[i - 1] + 1, p[i] + 1), p[i - 1] + cost);
            }

            // copy current distance counts to 'previous row' distance counts
            _d = p;
            p = d;
            d = _d;
        }

        // our last action in the above loop was to switch d and p, so p now
        // actually has the most recent cost counts
        return 1.0f - ((float) p[n] / Math.max(other.length(), sa.length));
    }

}
